(*---------------------------------------------------------------------------*
 * Hand rolled parsers for HOL types and terms.                              *
 *---------------------------------------------------------------------------*)
load "Substring";

fun failwith s = raise Fail (s^".\n");
fun quote s = String.concat ["\"",s,"\""];

val alphanumeric = Char.contains
  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_'";

val symbolic = Char.contains "$#?+*/\\=<>&%@!:;|-~.";

(*---------------------------------------------------------------------------*
 * Types for indexes and locations: for finding things in the input.         *
 *---------------------------------------------------------------------------*)
type index = int  (* index in the underlying string. *)
type 'a location = 'a * index;

datatype lexeme = lparen of index
                | rparen of index
                | lbrack of index
                | rbrack of index
                | lbrace of index
                | rbrace of index
                | ident  of string location
                | symbl  of string location
                | bindr  of string location
                | qident of (string list * string) location
                | qsymbl of (string list * string) location
                | qbindr of (string list * string) location
                | stringeme of string location;
(*
                | aq  of Term.term
*)



(*---------------------------------------------------------------------------*
 * Consume a potentially nested comment. The leading two characters of the   *
 * comment have already been seen.                                           *
 *---------------------------------------------------------------------------*)
local fun dropchar #"*" = false
        | dropchar #"(" = false
        | dropchar   _  = true
in
fun comment ss0 =
 let val ss1 = Substring.dropl dropchar ss0
 in 
  case Substring.getc ss1
   of NONE            => NONE  (* At end of string without seeing a dropchar *)
    | SOME (#"(",ss2) =>       (* recursive comment imposes structure! *)
        (case Substring.getc ss2
          of NONE => NONE
           | SOME(#"*",ss3) => 
                 (case comment ss3
                   of NONE     => NONE
                    | SOME ss4 => comment ss4)
                    | SOME _   => comment ss2)
    | SOME (#"*",ss2) => 
        (case Substring.getc ss2
          of NONE           => NONE
           | SOME(#")",ss3) => SOME ss3  (* Found EOC *)
           | SOME        _  => comment ss2)
    | SOME _ => failwith "comment: case should be unreachable"
 end 
end;

fun bump n ss = 
  let val (s,i,j) = Substring.base ss
  in Substring.substring(s,i,j+n)
  end;


(*---------------------------------------------------------------------------*
 * This is called when we know that ss[0] is alphanumeric, and now we want   *
 * to get the whole thing. This function will also build dotted identifiers. *
 * A dotted ident can end in either an alphanumeric or a symbolic.           *
 *---------------------------------------------------------------------------*)
fun get_alphanumeric (ssl,ssr) =
  let val (ss1,ss2) = Substring.splitl alphanumeric ssr
      val ssl' = Substring.span (ssl,ss1)
  in case Substring.getc ss2
      of NONE => SOME(ssl',ss2)
       | SOME (#".",ss3) => 
           (case Substring.getc ss3
             of NONE         => SOME(ssl',ss2)
              | SOME (c,_) => 
                  if (alphanumeric c) 
                  then get_alphanumeric (bump 1 ssl', ss3)
                  else if (symbolic c) 
                       then let val (ss4,ss5) = Substring.splitl symbolic ss3
                            in SOME(Substring.span (bump 1 ssl', ss4),ss5)
                            end
                  else SOME(ssl',ss2))
       | _ => SOME(ssl',ss2)
  end;


(*---------------------------------------------------------------------------*
 * The basic lexer.                                                          *
 *---------------------------------------------------------------------------*)
fun syslex ss0 =
 let val ss1 = Substring.dropl Char.isSpace ss0
     val (s,i,_) = Substring.base ss1
 in
  case Substring.getc ss1
   of NONE         => NONE
    | SOME (c,ss2) => 
       case c 
        of #"[" => SOME (lbrack i, ss2)
         | #"]" => SOME (rbrack i, ss2)
         | #"{" => SOME (lbrace i, ss2)
         | #"}" => SOME (rbrace i, ss2)
         | #")" => SOME (rparen i, ss2)
         | #"(" => (case Substring.getc ss2 
                     of SOME (#"*",ss3) => 
                          (case comment ss3
                            of NONE     => NONE  
                             | SOME ss4 => syslex ss4)
                      | _ => SOME (lparen i,ss2))
         | otherwise =>
            if (alphanumeric c) 
            then let val pref = Substring.substring(s,i,1)
                     val opt = get_alphanumeric (pref, ss2)
                 in 
                  case opt 
                   of NONE => NONE
                    | SOME (first,rst) => 
                        SOME(ident (Substring.string first, i), rst)
                 end
            else if (symbolic c) 
                 then let val (ss3,ss4) = Substring.splitl symbolic ss1
                      in SOME (symbl (Substring.string ss3, i), ss4)
                      end
                 else failwith ("Character not recognized in system lexer: "
                                ^Char.toString c)
  end;

(*---------------------------------------------------------------------------*
 * A lexer that is parameterized by an override lexer. The override is tried *
 * first. If that fails, then syslex is called.                              *
 *---------------------------------------------------------------------------*)

fun prelex override ss = 
   override ss 
   handle Interrupt => raise Interrupt
        |        _  => syslex ss;

 
local exception NO_OVERRIDE
in
fun null_override ss = raise NO_OVERRIDE
end;

val override = ref (null_override : substring -> (lexeme * substring) option);

(*---------------------------------------------------------------------------*
 * The user visible lexer.                                                   *
 *---------------------------------------------------------------------------*)
fun lex ss = prelex (!override) ss;


fun lex2list lexr s =
  let fun l2l ss L =
        case (lexr ss)
         of NONE          => rev L
          | SOME (s, ss') => l2l ss' (s::L)
  in 
    l2l (Substring.all s) []
  end;
     


(*---------------------------------------------------------------------------*
 * Parsing.                                                                  *
 *---------------------------------------------------------------------------*)

datatype ('a,'type) pretype 
     = tyIdent of 'a
     | tyApp  of ('a,'type)pretype * ('a,'type)pretype list
     | tyAntiq of 'type;

fun typarse lexr ss =
 let fun prs_type stk ss =
      case lexr ss
       of NONE => failwith "typarse.prs_type: unexpected end of input"
        | SOME (lparen i, rst) => 
           let val (stk,rst') = prs_type stk rst
           in 
              case (lexr rst')
               of NONE => failwith"typarse.prs_type: unexpected end of input\
                           \ when looking for a \")\""
                | SOME (rparen i, rst'')      => (stk,rst'')
                | SOME (ident (s,i), rst'')   => ???
                | SOME (symbl (",",i), rst'') => 
                   (case stk
                     of h::t => (case (prs_typel [h] rst'')
                                  of ([x],rst3) => typarse (x::t) rst3
                                   | _ => failwith "error in paren exp.")
                      | [] => failwith "no type before comma")
           end
        | SOME (rparen i, rst) => 
            failwith ("typarse.prs_type: unexpected \")\" at character "
                      ^Int.toString i")

        | SOME (ident p, rst) => 
            let val tyid = tyIdent p
            in case stk 
                of []   => prs_type [tyid] rst
                 | h::t => prs_type (tyApp(tyid,[h])::t) rst
            end
        | SOME (symbl (s,i),rst') => 
           (case s 
             of "->" => 
                 let val (stk,rst'') = prs_type stk rst'
                 in case stk
                     of (h1::h2::t) => 
                         prs_type (tyApp(tyIdent("->",i),[h1,h2])::t) rst''
                      | _ => failwith "too few components for arrow"
                 end
              | "+"  => failwith "not implemented"
              | "#"  => failwith "not implemented"
              |  _   => failwith ("typarse.prs_type: expected \"->\""
                                   ^" or \"+\" or \"#\" at character "
                                   ^Int.toString i"
                                   ^" instead of "^quote s))
        | SOME (thing, rst) => failwith "unknown lexeme"

     and 
     prs_typel stk ss =
       let val (stk,rst') = prs_type stk rst
       in 
         case (lexr rst')
          of NONE => failwith"typarse.prs_type: unexpected end of input\
                           \ when looking for a \")\""
           | SOME (rparen i, rst'') => 
              (case (lexr rst'')
                of NONE => failwith"typarse.prs_typel: unexpected end of input\
                           \ when looking for an identifier"
                 | SOME (ident p, rst3) => ([tyApp(tyIdent p,stk)], rst3)
                 | SOME (symbl (",",i), rst3) => prs_typel stk rst3
                 | SOME _ => failwith "typarse.prs_typel: looking for ident.\
                                      \ or comma, but none found")
       end
  in
   prs_type [] ss
  end;


datatype ('a,'term,'type) preterm 
     = Ident of 'a
     | Comb  of ('a,'term,'type)preterm * ('a,'term,'type)preterm
     | Abs   of ('a,'term,'type)preterm * ('a,'term,'type)preterm
     | Antiq of 'term
     | Constrained of ('a,'term,'type)preterm * 'type;


end;
